##################################################
## Author: E. A. Harris, S. L. DeMora, D. Albarracin
## Project: The consequences of misinformation concern on media consumption
## Purpose: Cleaning File
## Date: 2024-06-10
##################################################

# NOTE: 
# Do not run this separately from the Replication Script. 
# The Replication Script will automatically call this cleaning script.

# ---------------------------------------------------------------------------------------------------------------
# ----------------------------- CLEANING SECTION START ----------------------------------------------------------
# ---------------------------------------------------------------------------------------------------------------
options(warn=-1)

#drop those who did not finish the survey
do1=do1 %>% filter(Finished=="TRUE")
d1=do1

#drop those who did not finish the survey
do2=do2 %>% filter(Finished=="TRUE")
d2=do2

#Study 1
#Gender
d1$gender  %<>% recode("Female" = "female",
                       "Male" = "male",
                       "Non-binary / third gender" = " ",
                       "Other" = " ")
d1=d1 %>%
  mutate(gender  = na_if(gender , " "))
d1$gender =as.factor(d1$gender)
d1 <- within(d1, gender <- relevel(gender, ref = "male"))
#Age
d1$age=as.numeric(as.character(d1$age))
#Education
d1$education.f=d1$education
d1$education  %<>% recode("Associate degree in college (2-year)" = "associate or some college",
                          "Some college but no degree" = "associate or some college",
                          "Bachelor's degree in college (4-year)" = "college",
                          "Master's degree" = "more than college",
                          "Doctoral degree" = "more than college",
                          "Professional degree (JD, MD)" = "more than college",
                          "High school graduate (high school diploma or equivalent including GED)" = "high school",
                          "Less than high school degree" = "less than high school")
#Party Affiliation
d1$PA%<>% recode("Other Party" = " ")
d1=d1 %>%
  mutate(PA= na_if(PA, " "))
d1$PA.f =as.factor(d1$PA)
d1 <- within(d1, PA.f <- relevel(PA.f, ref = "Democratic Party"))
d1$PA.f.R=relevel(d1$PA.f,ref = "Republican Party")

d.DR1=droplevels(subset(d1, ((PA=="Democratic Party")|(PA=="Republican Party"))))
d.DR1$concern.c=d.DR1$concern-mean(d.DR1$concern,na.rm=TRUE)
d.DR1$educnum= d.DR1$education %<>% recode("associate or some college" = 3,
                                           "college" = 4,
                                           "more than college"= 5,
                                           "high school" = 2,
                                           "less than high school" = 1)
#Study 2
#Gender
d2$gender  %<>% recode("Female" = "female",
                       "Male" = "male",
                       "Non-binary / third gender" = " ",
                       "Other" = " ")
d2=d2 %>%
  mutate(gender  = na_if(gender , " "))
d2$gender =as.factor(d2$gender)
d2 <- within(d2, gender <- relevel(gender, ref = "male"))
#Age
d2$age=as.numeric(as.character(d2$age))
#Education
d2$education.f=d2$education
d2$education  %<>% recode("Associate degree in college (2-year)" = "associate or some college",
                          "Some college but no degree" = "associate or some college",
                          "Bachelor's degree in college (4-year)" = "college",
                          "Master's degree" = "more than college",
                          "Doctoral degree" = "more than college",
                          "Professional degree (JD, MD)" = "more than college",
                          "High school graduate (high school diploma or equivalent including GED)" = "high school",
                          "Less than high school degree" = "less than high school")
#Party Affiliation
d2$PA%<>% recode("Other Party" = " ")
d2=d2 %>%
  mutate(PA= na_if(PA, " "))
d2$PA.f =as.factor(d2$PA)
d2 <- within(d2, PA.f <- relevel(PA.f, ref = "Democratic Party"))
d2$PA.f.R=relevel(d2$PA.f,ref = "Republican Party")

d.DR2=droplevels(subset(d2, ((PA=="Democratic Party")|(PA=="Republican Party"))))
d.DR2$concern.c=d.DR2$concern-mean(d.DR2$concern,na.rm=TRUE)
d.DR2$educnum= d.DR2$education %<>% recode("associate or some college" = 3,
                                           "college" = 4,
                                           "more than college"= 5,
                                           "high school" = 2,
                                           "less than high school" = 1)

#Study 3
do3.1.1= data.frame("gender"=do3$PGENDER_W1,"PO"=do3$PPOLVIEW_W1,"agegroup"=do3$PAGEFINAL_W1,"education"=do3$EDUCATION_W1,"Q119_1"=do3$Q119_1_W3,"Q119_2"=do3$Q119_2_W3,"Q119_3"=do3$Q119_3_W3,"Q119_4"=do3$Q119_4_W3,"Q119_5"=do3$Q119_5_W3,"Q119_6"=do3$Q119_6_W3,"Q119_7"=do3$Q119_7_W3,"Q119_8"=do3$Q119_8_W3,"Q119_9"=do3$Q119_9_W3,"Q119_10"=do3$Q119_10_W3,"PA"=do3$PPOLPARTY_W1,"concern"=do3$Q68_W1,"set"=1)
do3.1.2= data.frame("gender"=do3$PGENDER_W2,"PO"=do3$PPOLVIEW_W2,"agegroup"=do3$PAGEFINAL_W2,"education"=do3$EDUCATION_W2,"Q119_1"=do3$Q119_1_W4,"Q119_2"=do3$Q119_2_W4,"Q119_3"=do3$Q119_3_W4,"Q119_4"=do3$Q119_4_W4,"Q119_5"=do3$Q119_5_W4,"Q119_6"=do3$Q119_6_W4,"Q119_7"=do3$Q119_7_W4,"Q119_8"=do3$Q119_8_W4,"Q119_9"=do3$Q119_9_W4,"Q119_10"=do3$Q119_10_W4,"PA"=do3$PPOLPARTY_W2,"concern"=do3$Q68_W2,"set"=2)
do3.1.1$ID=c(1:nrow(do3.1.1))
do3.1.2$ID=c(1:nrow(do3.1.2))
do3.1=rbind(do3.1.1,do3.1.2)
do3.1=do3.1[which(do3.1$PA!=3),]
do3.1 %>%
  replace_with_na_if(.predicate = is.numeric,
                     condition = ~.x == 998) %>%
  replace_with_na_if(.predicate = is.numeric,
                     condition = ~.x == 999) -> do3.1

do3.2.1= data.frame("gender"=do3$PGENDER_W1,"PO"=do3$PPOLVIEW_W1,"agegroup"=do3$PAGEFINAL_W1,"education"=do3$EDUCATION_W1,"Q119_1"=do3$Q119_1_W1,"Q119_2"=do3$Q119_2_W1,"Q119_3"=do3$Q119_3_W1,"Q119_4"=do3$Q119_4_W1,"Q119_5"=do3$Q119_5_W1,"Q119_6"=do3$Q119_6_W1,"Q119_7"=do3$Q119_7_W1,"Q119_8"=do3$Q119_8_W1,"Q119_9"=do3$Q119_9_W1,"Q119_10"=do3$Q119_10_W1,"PA"=do3$PPOLPARTY_W1,"concern"=do3$Q68_W3,"set"=1)
do3.2.2= data.frame("gender"=do3$PGENDER_W2,"PO"=do3$PPOLVIEW_W2,"agegroup"=do3$PAGEFINAL_W2,"education"=do3$EDUCATION_W2,"Q119_1"=do3$Q119_1_W2,"Q119_2"=do3$Q119_2_W2,"Q119_3"=do3$Q119_3_W2,"Q119_4"=do3$Q119_4_W2,"Q119_5"=do3$Q119_5_W2,"Q119_6"=do3$Q119_6_W2,"Q119_7"=do3$Q119_7_W2,"Q119_8"=do3$Q119_8_W2,"Q119_9"=do3$Q119_9_W2,"Q119_10"=do3$Q119_10_W2,"PA"=do3$PPOLPARTY_W2,"concern"=do3$Q68_W4,"set"=2)
do3.2.1$ID=c(1:nrow(do3.2.1))
do3.2.2$ID=c(1:nrow(do3.2.2))
do3.2=rbind(do3.2.1,do3.2.2)
do3.2=do3.2[which(do3.2$PA!=3),]
do3.2 %>%
  replace_with_na_if(.predicate = is.numeric,
                     condition = ~.x == 998) %>%
  replace_with_na_if(.predicate = is.numeric,
                     condition = ~.x == 999) -> do3.2
d3.1=do3.1
d3.2=do3.2

#Gender
d3.1$gender=as.character(as.numeric(d3.1$gender))
d3.1$gender  %<>% recode("2" = "Female",
                         "1" = "Male",
                         "3" = " ")
d3.1=d3.1 %>%
  mutate(gender = na_if(gender , " "))
d3.1$gender =as.factor(d3.1$gender)
d3.1 <- within(d3.1, gender <- relevel(gender, ref = "Male"))

d3.2$gender=as.character(as.numeric(d3.2$gender))
d3.2$gender  %<>% recode("2" = "Female",
                         "1" = "Male",
                         "3" = " ")
d3.2=d3.2 %>%
  mutate(gender = na_if(gender , " "))
d3.2$gender =as.factor(d3.2$gender)
d3.2 <- within(d3.2, gender <- relevel(gender, ref = "Male"))
#Education
d3.1$education= as.numeric(d3.1$education)
d3.2$education= as.numeric(d3.2$education)
#age
d3.1$agegroup =as.numeric(d3.1$agegroup)
d3.2$agegroup =as.numeric(d3.2$agegroup)
#concern
d3.1$concern=as.numeric(d3.1$concern)
d3.2$concern=as.numeric(d3.2$concern)
#party
d3.1$PA=as.character(d3.1$PA)
d3.1$PA  %<>% recode("2" = "Democrat",
                     "1" = "Republican")
d3.1$PA.f=as.factor(d3.1$PA)
d3.2$PA=as.character(d3.2$PA)
d3.2$PA  %<>% recode("2" = "Democrat",
                     "1" = "Republican")
d3.2$PA.f=as.factor(d3.2$PA)
d3.1 <- d3.1 %>% mutate_at(c('Q119_1', 'Q119_2', 'Q119_3', 'Q119_4', 'Q119_5', 'Q119_6', 'Q119_7', 'Q119_8', 'Q119_9', 'Q119_10'), as.numeric)
d3.2 <- d3.2 %>% mutate_at(c('Q119_1', 'Q119_2', 'Q119_3', 'Q119_4', 'Q119_5', 'Q119_6', 'Q119_7', 'Q119_8', 'Q119_9', 'Q119_10'), as.numeric)
d3.1$PA.f.R=relevel(d3.1$PA.f,ref = "Republican")

d.DR1$masterid=c(1:nrow(d.DR1))
d_long1 <- bind_cols(d.DR1 %>% 
                       dplyr::select(. , concern.c, PA.f, gender, age, education, starts_with("getinfo"), masterid) %>%
                       gather(. , item, getinfo, starts_with("getinfo")))

d_long1=d_long1 %>% filter(!(item=="getinfo_DO")&!(item=="getinfo_lib")&!(item=="getinfo_con")&!(item=="getinfo_3")&!(item=="getinfo_4")&!(item=="getinfo_5")&!(item=="getinfo_6")&!(item=="getinfo_10")&!(item=="getinfo_15")&!(item=="getinfo_16")&!(item=="getinfo_18"))

d_long1$medialean5= d_long1$item
d_long1$medialean5 %<>% recode("getinfo_7" = "Conservative",
                               "getinfo_17" = "Conservative-leaning", 
                               "getinfo_13" = "Conservative-leaning", 
                               "getinfo_10" = "Center", 
                               "getinfo_15" = "Center",
                               "getinfo_16" = "Center",
                               "getinfo_9" = "Center", 
                               "getinfo_11" = "Liberal-leaning",  
                               "getinfo_12" = "Liberal-leaning", 
                               "getinfo_14" = "Liberal-leaning", 
                               "getinfo_8" = "Liberal", 
                               "getinfo_1" = "",
                               "getinfo_2" = "",
                               "getinfo_3" = "",
                               "getinfo_4" = "",
                               "getinfo_5" = "",
                               "getinfo_6" = "")


d_long1=d_long1 %>%
  mutate(medialean5= na_if(medialean5, " "))
d_long1$medialean5 =as.factor(d_long1$medialean5)
d_long1$medialean5b <- car::recode(d_long1$medialean5, "c('Liberal', 'Liberal-leaning') = 'Liberal'; c('Conservative-leaning', 'Conservative') = 'Conservative'; 'Center' = 'Mainstream'")
d_long1$medialean5b = factor(d_long1$medialean5b, levels = c("Mainstream","Conservative", "Liberal"))


d_long1$getinfo=as.numeric(as.character(d_long1$getinfo))
d_long1 <- within(d_long1, PA.f.R <- relevel(PA.f, ref = "Republican Party"))
d_long1 <- within(d_long1, medialean.lib <- relevel(medialean5, ref = "Liberal-leaning"))

d.DR2$masterid=c(1:nrow(d.DR2))
d_long2 <- bind_cols(d.DR2 %>% 
                       dplyr::select(. , concern.c,concern, effcond, PA.f, gender, age, education, starts_with("getinfo"), masterid) %>%
                       gather(. , item, getinfo, starts_with("getinfo")))



d_long2$medialean5= d_long2$item
d_long2$medialean5 %<>% recode("getinfo_7" = "Conservative", 
                               "getinfo_17" = "Conservative-leaning", 
                               "getinfo_13" = "Conservative-leaning", 
                               "getinfo_10" = "Center", 
                               "getinfo_15" = "Center", 
                               "getinfo_16" = "Center", 
                               "getinfo_9" = "Center", 
                               "getinfo_11" = "Liberal-leaning",  
                               "getinfo_12" = "Liberal-leaning", 
                               "getinfo_14" = "Liberal-leaning", 
                               "getinfo_8" = "Liberal", 
                               "getinfo_1" = "",
                               "getinfo_2" = "",
                               "getinfo_3" = "",
                               "getinfo_4" = "",
                               "getinfo_5" = "",
                               "getinfo_6" = "",
                               "getinfo_DO" = "")

d_long2$mediaquality= d_long2$item
d_long2$mediaquality %<>% recode("getinfo_7" = "0.39", 
                                 "getinfo_17" = "0.53", 
                                 "getinfo_13" = "0.80", 
                                 "getinfo_10" = "", 
                                 "getinfo_15" = "", 
                                 "getinfo_16" = "", 
                                 "getinfo_9" = "0.86", 
                                 "getinfo_11" = "0.93",  
                                 "getinfo_12" = "0.84", 
                                 "getinfo_14" = "0.66", 
                                 "getinfo_8" = "0.66", 
                                 "getinfo_1" = "",
                                 "getinfo_2" = "",
                                 "getinfo_3" = "",
                                 "getinfo_4" = "",
                                 "getinfo_5" = "",
                                 "getinfo_6" = "") 


d_long2=d_long2 %>%
  mutate(medialean5= na_if(medialean5, " "))
d_long2$medialean5 =as.factor(d_long2$medialean5)
d_long2$medialean5b <- car::recode(d_long2$medialean5, "c('Liberal', 'Liberal-leaning') = 'Liberal'; c('Conservative-leaning', 'Conservative') = 'Conservative'; 'Center' = 'Mainstream'")
d_long2$medialean5b = factor(d_long2$medialean5b, levels = c("Mainstream","Conservative", "Liberal"))

d_long2$getinfo=as.numeric(as.character(d_long2$getinfo))
d_long2$effcond.n=d_long2$effcond
d_long2$effcond.n %<>% recode("0" = -0.5,
                              "1" = 0.5)
d_long2 <- within(d_long2, PA.f.R <- relevel(PA.f, ref = "Republican Party"))
d_long2 <- within(d_long2, medialean.lib <- relevel(medialean5, ref = "Liberal-leaning"))
d_long2$medialean5  %<>% recode("con" = "Conservative",
                                "lib" = "Liberal")

d3.1$index=c(1:nrow(d3.1))
d3.2$index=c(1:nrow(d3.2))
d_long3.1 <- bind_cols(d3.1 %>% 
                         dplyr::select(. , concern, PA.f, PA.f.R, gender, agegroup, education, starts_with("Q119"), ID,index,set) %>%
                         gather(. , item, mediause, starts_with("Q119")))


d_long3.1$medialean5= d_long3.1$item
d_long3.1$medialean5 %<>% recode("Q119_1" = "Conservative-leaning",  
                                 "Q119_7" = "Conservative-leaning", 
                                 "Q119_10" = "Center", 
                                 "Q119_9" = "Center", 
                                 "Q119_4" = "Center", 
                                 "Q119_3" = "Center", 
                                 "Q119_5" = "Liberal-leaning",  
                                 "Q119_6" = "Liberal-leaning", 
                                 "Q119_8" = "Liberal-leaning", 
                                 "Q119_2" = "Liberal") 


d_long3.1$mediaquality= d_long3.1$item
d_long3.1$mediaquality %<>% recode("Q119_1" = "0.43", 
                                   "Q119_7" = "0.80", 
                                   "Q119_10" = "", 
                                   "Q119_9" = "", 
                                   "Q119_4" = "", 
                                   "Q119_3" = "0.86", 
                                   "Q119_5" = "0.93",  
                                   "Q119_6" = "0.84", 
                                   "Q119_8" = "0.66", 
                                   "Q119_2" = "0.66") 


d_long3.1=d_long3.1 %>%
  mutate(medialean5= na_if(medialean5, " "))
d_long3.1$medialean5 =as.factor(d_long3.1$medialean5)
d_long3.1$medialean5b <- car::recode(d_long3.1$medialean5, "c('Liberal', 'Liberal-leaning') = 'Liberal'; c('Conservative-leaning', 'Conservative') = 'Conservative'; 'Center' = 'Mainstream'")
d_long3.1$medialean5b = factor(d_long3.1$medialean5b, levels = c("Mainstream","Conservative", "Liberal"))


d3.2$PA.f.R=relevel(d3.2$PA.f,ref = "Republican")
d_long3.2 <- bind_cols(d3.2 %>% 
                         dplyr::select(. , concern, PA.f, PA.f.R, gender, agegroup, education, starts_with("Q119"), ID,index,set) %>%
                         gather(. , item, mediause, starts_with("Q119")))


d_long3.2$medialean5= d_long3.2$item
d_long3.2$medialean5 %<>% recode("Q119_1" = "Conservative-leaning",  
                                 "Q119_7" = "Conservative-leaning", 
                                 "Q119_10" = "Center", 
                                 "Q119_9" = "Center", 
                                 "Q119_4" = "Center", 
                                 "Q119_3" = "Center", 
                                 "Q119_5" = "Liberal-leaning",  
                                 "Q119_6" = "Liberal-leaning", 
                                 "Q119_8" = "Liberal-leaning", 
                                 "Q119_2" = "Liberal") 



d_long3.2=d_long3.2 %>%
  mutate(medialean5= na_if(medialean5, " "))
d_long3.2$medialean5 =as.factor(d_long3.2$medialean5)
d_long3.2$medialean5b <- car::recode(d_long3.2$medialean5, "c('Liberal', 'Liberal-leaning') = 'Liberal'; c('Conservative-leaning', 'Conservative') = 'Conservative'; 'Center' = 'Mainstream'")
d_long3.2$medialean5b = factor(d_long3.2$medialean5b, levels = c("Mainstream","Conservative", "Liberal"))


d_long3.2$mediause=as.numeric(as.character(d_long3.2$mediause))
d_long3.1$mediause=as.numeric(as.character(d_long3.1$mediause))



d.DR1$Gender <- car::recode(d.DR1$gender, "'male' = 'Male'; 'female' = 'Female'")
d.DR1 %>%
  mutate(
    Age = cut(age,
              breaks = c(17, 30, 45, 60, 85),
              labels = c("18-30", "31-45", "46-60", "61+"),
              include.lowest = TRUE)
  ) -> d.DR1

d.DR1 %>%
  mutate(
    Education = case_when(
      education %in% c(1, 2) ~ "HS or less",
      education %in% c(3, 4) ~ "Some College",
      education == 5 ~ "4-year Degree",
      education %in% c(6, 7, 8) ~ "Graduate Degree"
    )
  ) -> d.DR1

d.DR1$Education <- as_factor(d.DR1$Education)
d.DR1$Education = factor(d.DR1$Education, levels = c( "HS or less", "Some College", "4-year Degree", "Graduate Degree"))

d.DR1$Female <- car::recode(d.DR1$Gender, "'Female' = 1; 'Male' = 0")

d.DR1 %>%
  dplyr::select(-Age, -Education) %>%
  rename(Age = age,
         Education = education,
         Party = PA.f) -> d.DR1_numeric

d.DR1_numeric$Gender = factor(d.DR1_numeric$Gender, levels = c("Male","Female"))

options(warn=0)

